#Alexander F. Gazmararian
#afg2@princeton.edu
#January 9, 2024

#Load packages
library(dplyr)
library(readxl)
library(stringr)
library(readr)
library(reshape2)
library(fuzzyjoin)
## zipcode is a currently archived package
#library(devtools)
#install_version('zipcode', version = '1.0')
#install_github("cran/zipcode")
library(zipcode)
library(zipcodeR)
library(geosphere)
library(foreign)
library(tidycensus)
library(here)

#FIPS
fips <- tidycensus::fips_codes
fips$fips <- as.numeric(paste0(fips$state_code, fips$county_code))
county_fips <- subset(fips, select = c(fips, county, state))
names(county_fips)[2] <- "name"
state_fips <- read.table(here("data", "input", "eiagenerator", "state_fips_master.csv"), sep=",", header=TRUE)
# Clean state_fips
state_fips <- state_fips[,c("state_name","state_abbr","fips")]
# Create County FIP
county_fips_sub <- county_fips %>% mutate_at("name", str_replace, "County", "")

#County centers----
gps_county <- read_csv(here("data", "input", "eiagenerator", "county_centers.csv"))
# CITATION: https://github.com/btskinner/spatial/blob/master/data/county_centers.csv
gps_county <- gps_county[,c("fips","clon00","clat00")]

#Zipcode data
data(zipcode)
zip_code_db_sub <- subset(zip_code_db, select = c(zipcode,lat,lng))

# 1992
# Read Files
gen <- read_excel(here("data", "input", "eiagenerator", "TYPE392.xls"))
plant <- read_excel(here("data", "input", "eiagenerator", "PLNT92.xls"))

# Year, FIPS Columns
dt_1992 <- county_fips
dt_1992 <- dt_1992[complete.cases(dt_1992),]
dt_1992$Year <- "1992"
dt_1992 <- dt_1992[,-3]
colnames(dt_1992) <- c("FIPS","County","Year")

# Create County FIP
plant <- merge(state_fips, plant, by.x = "state_abbr", by.y = "PLNTST")
plant$CTYCODE <- str_pad(plant$CTYCODE, width=3, side="left", pad="0")
plant$FIPS <- paste0(plant$fips, plant$CTYCODE)

# Remove Unnecessary Columns in plant
plant <- plant[, c("FIPS","state_abbr","state_name","UTILCODE","PLNTCODE", "PLNTNAME","CTYNAME","PLNTZIP")]

# Gas, Wind, or Solar?
gen <- gen %>% mutate(TypeofFuel = case_when(OPENSRC1 == "BFG" ~ "Gas",
                                             OPENSRC1 == "GAS" ~ "Gas",
                                             OPENSRC1 == "LNG" ~ "Gas",
                                             OPENSRC1 == "LPG" ~ "Gas",
                                             OPENSRC1 == "NG" ~ "Gas",
                                             OPENSRC1 == "RG" ~ "Gas",
                                             OPENSRC1 == "SNG" ~ "Gas",
                                             OPENSRC1 == "ANT" ~ "Coal",
                                             OPENSRC1 == "BIT" ~ "Coal",
                                             OPENSRC1 == "COL" ~ "Coal",
                                             OPENSRC1 == "CWM" ~ "Coal",
                                             OPENSRC1 == "LIG" ~ "Coal",
                                             OPENSRC1 == "SUB" ~ "Coal",
                                             OPENSRC1 == "SUN" ~ "Sun",
                                             OPENSRC1 == "WND" ~ "Wind"))
gen <- gen[,c("UTILCODE","PLNTCODE","INSERVMTH","INSERVYR","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("UTILCODE","PLNTCODE"))
dt_ref <- dt_ref[complete.cases(dt_ref),]
head(dt_ref)
dim(dt_ref)

# Rename Dataset
dt_ref <- dt_ref %>% 
  dplyr::rename(
    `Utility Code` = UTILCODE,
    `Plant Code` = PLNTCODE,
    `state_abbr` = `state_abbr`,
    `state_name` = `state_name`,
    `Plant Name` = PLNTNAME,
    `County Name` = CTYNAME,
    `Plant Zip Code` = PLNTZIP,
    `In-Service Month` = INSERVMTH,
    `In-Service Year` = INSERVYR
  )
head(dt_ref)

# Tweak Dataset
dt_ref <- subset(dt_ref, `In-Service Year`%in%c('1992',"1991","1990","1989"))
dt_ref$`Plant Zip Code` <- str_pad(dt_ref$`Plant Zip Code`, width=9, side="left", pad="0")
dt_ref$`Plant Zip Code` = substr(dt_ref$`Plant Zip Code`,1,5)
dt_ref <- distinct(dt_ref)

# Geocode ZIP Code
dt_ref <- merge(dt_ref, zip_code_db_sub, by.x = "Plant Zip Code", by.y = "zipcode", all.x = TRUE)

subset(dt_ref, is.na(lat))
#Yolo County California
dt_ref[dt_ref$`Plant Zip Code`=="00000",]$lat <- 38.7646
dt_ref[dt_ref$`Plant Zip Code`=="00000",]$lng <- -121.9018
#New Castle County Delaware
dt_ref[dt_ref$`Plant Zip Code`=="19899",]$lat <- 39.5393
dt_ref[dt_ref$`Plant Zip Code`=="19899",]$lng <- -75.6674
#Charleston County, South Carolina
dt_ref[dt_ref$`Plant Zip Code`=="29402",]$lat <- 32.7957
dt_ref[dt_ref$`Plant Zip Code`=="29402",]$lng <- -79.7848
#Kay County, Oklahoma
dt_ref[dt_ref$`Plant Zip Code`=="74603",]$lat <- 36.8234
dt_ref[dt_ref$`Plant Zip Code`=="74603",]$lng <- -97.1790
#Lubbock County, Texas
dt_ref[dt_ref$`Plant Zip Code`=="79409",]$lat <- 33.5846
dt_ref[dt_ref$`Plant Zip Code`=="79409",]$lng <- -101.8456

subset(dt_ref, is.na(lat))

# Remove Unnecessary Columns
dt_ref <- dt_ref[,c("Utility Code","Plant Code","FIPS","TypeofFuel","lat","lng")]

# Read Year Data
dt_1992 <- readRDS(here("data", "inter", "eiagenerator", "1992.rds"))
dt_1992$FIPS <- str_pad(dt_1992$FIPS, width=5, side="left", pad="0")

# Geocode Counties
dt_1992 <- merge(gps_county, dt_1992, by.x="fips", by.y="FIPS", all.y=TRUE)

dt_1992$NewGasDistance <- 0
dt_1992$NewCoalDistance <- 0
dt_1992$NewSunDistance <- 0
dt_1992$NewWindDistance <- 0

## Find Distance (in METERS)
for (i in 1:nrow(dt_1992)) {
  dt_ref$dist <- distVincentyEllipsoid(dt_1992[i,c('clon00','clat00')], dt_ref[,c('lng','lat')])
  
  ## NEW GAS DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Gas", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_1992$NewGasDistance[i] <- NA
  } else {
    dt_1992$NewGasDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW COAL DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Coal", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_1992$NewCoalDistance[i] <- NA
  } else {
    dt_1992$NewCoalDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW SUN DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Sun", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_1992$NewSunDistance[i] <- NA
  } else {
    dt_1992$NewSunDistance[i] <- dt_ref$dist[index] 
  } 
  
  ## NEW WIND DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Wind", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_1992$NewWindDistance[i] <- NA
  } else {
    dt_1992$NewWindDistance[i] <- dt_ref$dist[index] 
  }
  
}

dt_1992 <- dt_1992[ , -which(names(dt_1992) %in% c("clon00","clat00"))]
names(dt_1992)[names(dt_1992) == 'fips'] <- 'FIPS'
head(dt_1992)

saveRDS(dt_1992, here("data", "inter", "eiagenerator", "completed", "1992.rds"))
rm(list = c("dt_1992", "dt_ref", "gen", "plant"))


#1996
# Read Files
gen <- read_excel(here("data", "input", "eiagenerator", "TYPE3Y96.xls"))
plant <- read_excel(here("data", "input", "eiagenerator", "PLANTY96.xls"))

# Year, FIPS Columns
dt_1996 <- county_fips
dt_1996 <- dt_1996[complete.cases(dt_1996),]
dt_1996$Year <- "1996"
dt_1996 <- dt_1996[,-3]
colnames(dt_1996) <- c("FIPS","County","Year")

# Create County FIP
plant <- merge(state_fips, plant, by.x = "state_abbr", by.y = "PLNTST")
plant$CTYCODE <- str_pad(plant$CTYCODE, width=3, side="left", pad="0")
plant$FIPS <- paste0(plant$fips, plant$CTYCODE)

# Remove Unnecessary Columns in plant
plant <- plant[, c("FIPS","state_abbr","state_name","UTILCODE","PLNTCODE", "PLNTNAME","CTYNAME","PLNTZIP")]

# Gas, Wind, or Solar?
gen <- gen %>% mutate(TypeofFuel = case_when(ENERGYSCE1 == "BFG" ~ "Gas",
                                             ENERGYSCE1 == "GAS" ~ "Gas",
                                             ENERGYSCE1 == "LNG" ~ "Gas",
                                             ENERGYSCE1 == "LPG" ~ "Gas",
                                             ENERGYSCE1 == "NG" ~ "Gas",
                                             ENERGYSCE1 == "RG" ~ "Gas",
                                             ENERGYSCE1 == "SNG" ~ "Gas",
                                             ENERGYSCE1 == "ANT" ~ "Coal",
                                             ENERGYSCE1 == "BIT" ~ "Coal",
                                             ENERGYSCE1 == "COL" ~ "Coal",
                                             ENERGYSCE1 == "CWM" ~ "Coal",
                                             ENERGYSCE1 == "LIG" ~ "Coal",
                                             ENERGYSCE1 == "SUB" ~ "Coal",
                                             ENERGYSCE1 == "SUN" ~ "Sun",
                                             ENERGYSCE1 == "WND" ~ "Wind"))
gen <- gen[,c("UTILCODE","PLNTCODE","INSERVMTH","INSERVYR","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("UTILCODE","PLNTCODE"))
dt_ref <- dt_ref[complete.cases(dt_ref),]
head(dt_ref)
dim(dt_ref)

# Rename Dataset
dt_ref <- dt_ref %>% 
  dplyr::rename(
    `Utility Code` = UTILCODE,
    `Plant Code` = PLNTCODE,
    `state_abbr` = `state_abbr`,
    `state_name` = `state_name`,
    `Plant Name` = PLNTNAME,
    `County Name` = CTYNAME,
    `Plant Zip Code` = PLNTZIP,
    `In-Service Month` = INSERVMTH,
    `In-Service Year` = INSERVYR
  )
head(dt_ref)

# Tweak Dataset
#dt_ref <- subset(dt_ref, `In-Service Year`=='1996')
dt_ref <- subset(dt_ref, `In-Service Year`%in%c("1996","1995","1994","1993"))
dt_ref$`Plant Zip Code` <- str_pad(dt_ref$`Plant Zip Code`, width=9, side="left", pad="0")
dt_ref$`Plant Zip Code` = substr(dt_ref$`Plant Zip Code`,1,5)
dt_ref <- distinct(dt_ref)
dim(dt_ref)

# Geocode ZIP Code

dt_ref <- merge(dt_ref, zip_code_db_sub, by.x = "Plant Zip Code", by.y = "zipcode", all.x = TRUE)

##Kay County, Okalahoma
dt_ref[dt_ref$`Plant Code`%in%c(7545,7546),]$lat <- 36.8234
dt_ref[dt_ref$`Plant Code`%in%c(7545,7546),]$lng <- -97.1790
##Harford County, Maryland
dt_ref[dt_ref$`Plant Code`%in%c(1556),]$lat <- 39.5839
dt_ref[dt_ref$`Plant Code`%in%c(1556),]$lng <- -76.3637
##Harford County, Maryland
dt_ref[dt_ref$`Plant Code`%in%c(7690),]$lat <- 39.8626
dt_ref[dt_ref$`Plant Code`%in%c(7690),]$lng <- -75.6266
#Alameda, California
dt_ref[dt_ref$`Plant Code`%in%c(7691),]$lat <- 37.6017
dt_ref[dt_ref$`Plant Code`%in%c(7691),]$lng <- -121.7195
#Fresno, California
dt_ref[dt_ref$`Plant Code`%in%c(7099),]$lat <- 36.9859
dt_ref[dt_ref$`Plant Code`%in%c(7099),]$lng <- -119.2321
#New Haven, Connecticut
dt_ref[dt_ref$`Plant Code`%in%c(544),]$lat <- 41.3267
dt_ref[dt_ref$`Plant Code`%in%c(544),]$lng <- -72.8043
#Hunterdon, New Jersey
dt_ref[dt_ref$`Plant Code`%in%c(2393),]$lat <- 40.5670
dt_ref[dt_ref$`Plant Code`%in%c(2393),]$lng <- -74.9209
#Berkeley, South Carolina
dt_ref[dt_ref$`Plant Code`%in%c(130),]$lat <- 33.1261
dt_ref[dt_ref$`Plant Code`%in%c(130),]$lng <- -80.0088
#Lincoln, North Carolina
dt_ref[dt_ref$`Plant Code`%in%c(7277),]$lat <- 35.4637
dt_ref[dt_ref$`Plant Code`%in%c(7277),]$lng <- -81.2078
#Montgomery, Maryland
dt_ref[dt_ref$`Plant Code`%in%c(1572),]$lat <- 39.1547
dt_ref[dt_ref$`Plant Code`%in%c(1572),]$lng <- -77.2405
#Orangeburg, South Carolina
dt_ref[dt_ref$`Plant Code`%in%c(7210),]$lat <- 33.5635
dt_ref[dt_ref$`Plant Code`%in%c(7210),]$lng <- -81.0755
#Effingham, Georgia
dt_ref[dt_ref$`Plant Code`%in%c(6124),]$lat <- 32.3781
dt_ref[dt_ref$`Plant Code`%in%c(6124),]$lng <- -81.3839
#Jackson, Mississippi
dt_ref[dt_ref$`Plant Code`%in%c(2047),]$lat <- 32.2988
dt_ref[dt_ref$`Plant Code`%in%c(2047),]$lng <- -90.1848
#Greene County, Alabama
dt_ref[dt_ref$`Plant Code`%in%c(10),]$lat <- 32.9718
dt_ref[dt_ref$`Plant Code`%in%c(10),]$lng <- -87.9335
#Houston County, Georgia
dt_ref[dt_ref$`Plant Code`%in%c(7348),]$lat <- 32.4220
dt_ref[dt_ref$`Plant Code`%in%c(7348),]$lng <- -83.6348
#Jeffereson County, Wisconsin
dt_ref[dt_ref$`Plant Code`%in%c(7159),]$lat <- 43.0899
dt_ref[dt_ref$`Plant Code`%in%c(7159),]$lng <- -88.7109
#Kenosha County, Wisconsin
dt_ref[dt_ref$`Plant Code`%in%c(7270),]$lat <- 42.5663
dt_ref[dt_ref$`Plant Code`%in%c(7270),]$lng <- -87.6581
#Marinette County, Wisconsin
dt_ref[dt_ref$`Plant Code`%in%c(4076),]$lat <- 45.3582
dt_ref[dt_ref$`Plant Code`%in%c(4076),]$lng <- -88.0650
#Fond Du Lac County, Wisconsin
dt_ref[dt_ref$`Plant Code`%in%c(7203),]$lat <- 43.7008
dt_ref[dt_ref$`Plant Code`%in%c(7203),]$lng <- -88.2461
#Tazewell, Illinois
dt_ref[dt_ref$`Plant Code`%in%c(7384),]$lat <- 40.5522
dt_ref[dt_ref$`Plant Code`%in%c(7384),]$lng <- -89.4742
#Palo Pinto, Texas
dt_ref[dt_ref$`Plant Code`%in%c(3628),]$lat <- 32.7680
dt_ref[dt_ref$`Plant Code`%in%c(3628),]$lng <- -98.2988
#Cameron County, Texas
dt_ref[dt_ref$`Plant Code`%in%c(3559),]$lat <- 26.1285
dt_ref[dt_ref$`Plant Code`%in%c(3559),]$lng <- -97.5247
#Harris County, Texas
dt_ref[dt_ref$`Plant Code`%in%c(7325),]$lat <- 29.7752
dt_ref[dt_ref$`Plant Code`%in%c(7325),]$lng <- -95.3103
#Orangeburg County, South Carolina
dt_ref[dt_ref$`Plant Code`%in%c(7480),]$lat <- 33.5635
dt_ref[dt_ref$`Plant Code`%in%c(7480),]$lng <- -81.0755
#Bremer County, Iowa
dt_ref[dt_ref$`Plant Code`%in%c(7388),]$lat <- 42.7611
dt_ref[dt_ref$`Plant Code`%in%c(7388),]$lng <- -92.3814
#Lafayette County, Missouri
dt_ref[dt_ref$`Plant Code`%in%c(2131),]$lat <- 39.0031
dt_ref[dt_ref$`Plant Code`%in%c(2131),]$lng <- -93.9878
#Jasper County, Missouri
dt_ref[dt_ref$`Plant Code`%in%c(7296),]$lat <- 37.1746
dt_ref[dt_ref$`Plant Code`%in%c(7296),]$lng <- -94.3565
#Saline County, Missouri
dt_ref[dt_ref$`Plant Code`%in%c(2144),]$lat <- 39.1944
dt_ref[dt_ref$`Plant Code`%in%c(2144),]$lng <- -93.1780
#Pottawatomie County, Kansas
dt_ref[dt_ref$`Plant Code`%in%c(1328),]$lat <- 39.3976
dt_ref[dt_ref$`Plant Code`%in%c(1328),]$lng <- -96.3226
#Kingman County, Kansas
dt_ref[dt_ref$`Plant Code`%in%c(1296),]$lat <- 37.6072
dt_ref[dt_ref$`Plant Code`%in%c(1296),]$lng <- -98.2213
#Osborne County, Kansas
dt_ref[dt_ref$`Plant Code`%in%c(1315),]$lat <- 39.4051
dt_ref[dt_ref$`Plant Code`%in%c(1315),]$lng <- -98.7481
#Russell County, Kansas
dt_ref[dt_ref$`Plant Code`%in%c(1319),]$lat <- 38.9095
dt_ref[dt_ref$`Plant Code`%in%c(1319),]$lng <- -98.7481
#Clay County, Kansas
dt_ref[dt_ref$`Plant Code`%in%c(1270),]$lat <- 39.3479
dt_ref[dt_ref$`Plant Code`%in%c(1270),]$lng <- -97.1790
#Sherman County, Kansas
dt_ref[dt_ref$`Plant Code`%in%c(1280),]$lat <- 39.2982
dt_ref[dt_ref$`Plant Code`%in%c(1280),]$lng <- -101.7980
#Stanton County, Kansas
dt_ref[dt_ref$`Plant Code`%in%c(6579),]$lat <- 37.6390
dt_ref[dt_ref$`Plant Code`%in%c(6579),]$lng <- -101.7980
#Sacramento County, California
dt_ref[dt_ref$`Plant Code`%in%c(6579,7523,7524),]$lat <- 38.4747
dt_ref[dt_ref$`Plant Code`%in%c(6579,7523,7524),]$lng <- -121.3542
#Sacramento County, California
dt_ref[dt_ref$`Plant Code`%in%c(6579,7523,7524),]$lat <- 38.4747
dt_ref[dt_ref$`Plant Code`%in%c(6579,7523,7524),]$lng <- -121.3542
#St Louis County, Minnesota
dt_ref[dt_ref$`Plant Code`%in%c(1979),]$lat <- 47.7395
dt_ref[dt_ref$`Plant Code`%in%c(1979),]$lng <- -92.3624
#Alachua County, Florida
dt_ref[dt_ref$`Plant Code`%in%c(7345),]$lat <- 29.6580
dt_ref[dt_ref$`Plant Code`%in%c(7345),]$lng <- -82.3018
#Martin County, Florida
dt_ref[dt_ref$`Plant Code`%in%c(6043),]$lat <- 27.0805
dt_ref[dt_ref$`Plant Code`%in%c(6043),]$lng <- -80.4104
#Minnehaha County, South Dakota
dt_ref[dt_ref$`Plant Code`%in%c(7237),]$lat <- 43.6632
dt_ref[dt_ref$`Plant Code`%in%c(7237),]$lng <- -96.8351

dt_ref <- dt_ref[complete.cases(dt_ref),]

# Remove Unnecessary Columns
dt_ref <- dt_ref[,c("Utility Code","Plant Code","FIPS","TypeofFuel","lat","lng")]

# Read Year Data
dt_1996 <- readRDS(here("data", "inter", "eiagenerator", "1996.rds"))
dim(dt_1996)
dt_1996$Year <- "1996"
dt_1996$FIPS <- str_pad(dt_1996$FIPS, width=5, side="left", pad="0")

# Geocode Counties
dt_1996 <- merge(gps_county, dt_1996, by.x="fips", by.y="FIPS", all.y=TRUE)

dt_1996$NewGasDistance <- 0
dt_1996$NewCoalDistance <- 0
dt_1996$NewSunDistance <- 0
dt_1996$NewWindDistance <- 0

## Find Distance (in METERS)
for (i in 1:nrow(dt_1996)) {
  
  dt_ref$dist <- distVincentyEllipsoid(dt_1996[i,c('clon00','clat00')], dt_ref[,c('lng','lat')])
  
  ## NEW GAS DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Gas", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_1996$NewGasDistance[i] <- NA
  } else {
    dt_1996$NewGasDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW COAL DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Coal", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_1996$NewCoalDistance[i] <- NA
  } else {
    dt_1996$NewCoalDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW SUN DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Sun", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_1996$NewSunDistance[i] <- NA
  } else {
    dt_1996$NewSunDistance[i] <- dt_ref$dist[index] 
  } 
  
  ## NEW WIND DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Wind", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_1996$NewWindDistance[i] <- NA
  } else {
    dt_1996$NewWindDistance[i] <- dt_ref$dist[index] 
  }
  
}
summary(dt_1996$NewGasDistance/1609.344)

dt_1996 <- dt_1996[ , -which(names(dt_1996) %in% c("clon00","clat00"))]
names(dt_1996)[names(dt_1996) == 'fips'] <- 'FIPS'
dim(dt_1996)
saveRDS(dt_1996, here("data", "inter", "eiagenerator", "completed", "1996.rds"))
rm(list = c("dt_1996", "dt_ref", "gen", "plant"))

# 2000-----
# Read Files
gen <- read_excel(here("data", "input", "eiagenerator", "ExistingGenerators2000.xls"))

# Year, FIPS Columns
dt_2000 <- county_fips
dt_2000 <- dt_2000[complete.cases(dt_2000),]
dt_2000$Year <- "2000"
dt_2000 <- dt_2000[,-3]
colnames(dt_2000) <- c("FIPS","County","Year")

#load fuzzy-matched plant data
plant <- readRDS(here("data", "inter", "eiagenerator", "crosswalk", "plant_fuzzy_2000.rds"))

# Gas, Wind, or Solar?
gen <- gen %>% mutate(TypeofFuel = case_when(EXISTING_ENERGY_SOURCE_1 == "BFG" ~ "Gas",
                                             EXISTING_ENERGY_SOURCE_1 == "GAS" ~ "Gas",
                                             EXISTING_ENERGY_SOURCE_1 == "LNG" ~ "Gas",
                                             EXISTING_ENERGY_SOURCE_1 == "LPG" ~ "Gas",
                                             EXISTING_ENERGY_SOURCE_1 == "NG" ~ "Gas",
                                             EXISTING_ENERGY_SOURCE_1 == "RG" ~ "Gas",
                                             EXISTING_ENERGY_SOURCE_1 == "SNG" ~ "Gas",
                                             EXISTING_ENERGY_SOURCE_1 == "ANT" ~ "Coal",
                                             EXISTING_ENERGY_SOURCE_1 == "BIT" ~ "Coal",
                                             EXISTING_ENERGY_SOURCE_1 == "COL" ~ "Coal",
                                             EXISTING_ENERGY_SOURCE_1 == "CWM" ~ "Coal",
                                             EXISTING_ENERGY_SOURCE_1 == "LIG" ~ "Coal",
                                             EXISTING_ENERGY_SOURCE_1 == "SUB" ~ "Coal",
                                             EXISTING_ENERGY_SOURCE_1 == "SUN" ~ "Sun",
                                             EXISTING_ENERGY_SOURCE_1 == "WND" ~ "Wind"))
gen <- gen %>%
  dplyr::rename(
    UTILCODE = EIA_UTILITY_CODE,
    PLNTCODE = PLANT_CODE,
    INSVYEAR = OPERATING_MONTH,
    INSVMONTH = OPERATING_YEAR
  )
gen <- gen[,c("UTILCODE","PLNTCODE","INSVYEAR","INSVMONTH","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("UTILCODE","PLNTCODE"))
dim(dt_ref)
dt_ref <- dt_ref[complete.cases(dt_ref),]
dim(dt_ref)

# Rename Dataset
dt_ref <- dt_ref %>% 
  dplyr::rename(
    `Utility Code` = UTILCODE,
    `Plant Code` = PLNTCODE,
    `state_abbr` = PLNTSTATE,
    `Plant Name` = PLNTNAME,
    `County Name` = CNTYNAME,
    `Plant Zip Code` = PLANT_ZIP5,
    `In-Service Month` = INSVMONTH,
    `In-Service Year` = INSVYEAR
  )
head(dt_ref)
dim(dt_ref)

# Adjust
dt_ref <- subset(dt_ref, `In-Service Year`%in%c("2000","1999","1998","1997"))
dim(dt_ref)
dt_ref$`Plant Zip Code` <- str_pad(dt_ref$`Plant Zip Code`, width=5, side="left", pad="0")
dt_ref$`In-Service Year` <- NULL
dt_ref$`In-Service Month` <- NULL
dt_ref <- distinct(dt_ref)
dim(dt_ref)

# Geocode ZIP Code
dt_ref <- merge(dt_ref, zipcode, by.x='Plant Zip Code', by.y='zip', all.x=TRUE)
dt_ref <- dt_ref %>% 
  dplyr::rename(
    lat = latitude,
    lng = longitude,
  )
subset(dt_ref, is.na(lat))

dt_ref[dt_ref$`Plant Zip Code`=="40300",]$lat <- 37.8259
dt_ref[dt_ref$`Plant Zip Code`=="40300",]$lng <- -84.8985
dt_ref[dt_ref$`Plant Zip Code`=="40400",]$lat <- 37.9599
dt_ref[dt_ref$`Plant Zip Code`=="40400",]$lng <- -84.1435
dt_ref[dt_ref$`Plant Zip Code`=="44100",]$lat <- 41.4117
dt_ref[dt_ref$`Plant Zip Code`=="44100",]$lng <- -82.1278

# Remove Unnecessary Columns
dt_ref <- dt_ref[,c("Utility Code","Plant Code","FIPS","TypeofFuel","lat","lng")]
dt_ref <- dt_ref[complete.cases(dt_ref),]
head(dt_ref)
dim(dt_ref)

# Read Year Data
dt_2000 <- readRDS(here("data", "inter", "eiagenerator", "2000.rds"))
dim(dt_2000)
dt_2000$FIPS <- str_pad(dt_2000$FIPS, width=5, side="left", pad="0")

# Geocode Counties
dt_2000 <- merge(gps_county, dt_2000, by.x="fips", by.y="FIPS", all.y=TRUE)

dt_2000$NewGasDistance <- 0
dt_2000$NewCoalDistance <- 0
dt_2000$NewSunDistance <- 0
dt_2000$NewWindDistance <- 0

## Find Distance (in METERS)
for (i in 1:nrow(dt_2000)) {
  
  dt_ref$dist <- distVincentyEllipsoid(dt_2000[i,c('clon00','clat00')], dt_ref[,c('lng','lat')])
  
  ## NEW GAS DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Gas", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2000$NewGasDistance[i] <- NA
  } else {
    dt_2000$NewGasDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW COAL DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Coal", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2000$NewCoalDistance[i] <- NA
  } else {
    dt_2000$NewCoalDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW SUN DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Sun", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2000$NewSunDistance[i] <- NA
  } else {
    dt_2000$NewSunDistance[i] <- dt_ref$dist[index] 
  } 
  
  ## NEW WIND DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Wind", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2000$NewWindDistance[i] <- NA
  } else {
    dt_2000$NewWindDistance[i] <- dt_ref$dist[index] 
  }
}

dt_2000 <- dt_2000[ , -which(names(dt_2000) %in% c("clon00","clat00"))]
names(dt_2000)[names(dt_2000) == 'fips'] <- 'FIPS'
head(dt_2000)
dim(dt_2000)
saveRDS(dt_2000, here("data", "inter", "eiagenerator", "completed", "2000.rds"))
rm(list = c("dt_2000", "dt_ref", "gen", "plant"))
gc()

#2004
# Read Files
gen <- read_excel(here("data", "input", "eiagenerator", "GenY04.xls"))
plant <- read_excel(here("data", "input", "eiagenerator", "PlantY04.xls"))

# Year, FIPS Columns
dt_2004 <- county_fips
dt_2004 <- dt_2004[complete.cases(dt_2004),]
dt_2004$Year <- "2004"
dt_2004 <- dt_2004[,-3]
colnames(dt_2004) <- c("FIPS","County","Year")

# Remove Unnecessary Columns in plant
plant <- plant[, c("UTILCODE","PLNTCODE","PLNTNAME","CNTYNAME","STATE","PLNTZIP")]
plant <- plant[complete.cases(plant),]

# Create County FIP
plant <- readRDS(here("data", "inter", "eiagenerator", "crosswalk", "plant_fuzzy_2004.rds"))

# Gas, Wind, or Solar?
gen <- gen %>% mutate(TypeofFuel = case_when(ENERGY_SOURCE_1 == "BFG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "GAS" ~ "Gas",
                                             ENERGY_SOURCE_1 == "LNG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "LPG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "NG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "RG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "SNG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "ANT" ~ "Coal",
                                             ENERGY_SOURCE_1 == "BIT" ~ "Coal",
                                             ENERGY_SOURCE_1 == "COL" ~ "Coal",
                                             ENERGY_SOURCE_1 == "CWM" ~ "Coal",
                                             ENERGY_SOURCE_1 == "LIG" ~ "Coal",
                                             ENERGY_SOURCE_1 == "SUB" ~ "Coal",
                                             ENERGY_SOURCE_1 == "SUN" ~ "Sun",
                                             ENERGY_SOURCE_1 == "WND" ~ "Wind"))
gen <- gen[,c("UTILCODE","PLNTCODE","INSVYEAR","INSVMONTH","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("UTILCODE","PLNTCODE"))
dt_ref <- dt_ref[complete.cases(dt_ref),]
head(dt_ref)
dim(dt_ref)

# Rename Dataset
dt_ref <- dt_ref %>% 
  dplyr::rename(
    `Utility Code` = UTILCODE,
    `Plant Code` = PLNTCODE,
    `state_abbr` = STATE,
    `Plant Name` = PLNTNAME,
    `County Name` = CNTYNAME,
    `Plant Zip Code` = PLNTZIP,
    `In-Service Month` = INSVMONTH,
    `In-Service Year` = INSVYEAR
  )
head(dt_ref)

# Tweak Dataset
dt_ref <- subset(dt_ref, `In-Service Year`%in%c("2004","2003","2002","2001"))
#dt_ref <- subset(dt_ref, `In-Service Year`=='2004')
dt_ref$`Plant Zip Code` <- str_pad(dt_ref$`Plant Zip Code`, width=5, side="left", pad="0")
dt_ref$`In-Service Month` <- NULL
dt_ref$`In-Service Year` <- NULL
dt_ref <- distinct(dt_ref)
dim(dt_ref)

# Geocode ZIP Code
dt_ref <- merge(dt_ref, zipcode, by.x='Plant Zip Code', by.y='zip', all.x=TRUE)
dt_ref <- dt_ref %>% 
  dplyr::rename(
    lat = latitude,
    lng = longitude,
  )
subset(dt_ref, is.na(lat))

dt_ref[dt_ref$`Plant Zip Code`=="38605",]$lat <- 34.7713
dt_ref[dt_ref$`Plant Zip Code`=="38605",]$lng <- -89.1706
dt_ref[dt_ref$`Plant Zip Code`=="44100",]$lat <- 41.4117
dt_ref[dt_ref$`Plant Zip Code`=="44100",]$lng <- -82.1278
dt_ref[dt_ref$`Plant Zip Code`=="55149",]$lat <- 44.4507
dt_ref[dt_ref$`Plant Zip Code`=="55149",]$lng <- -96.3226
dt_ref[dt_ref$`Plant Zip Code`=="87058",]$lat <- 45.4122
dt_ref[dt_ref$`Plant Zip Code`=="87058",]$lng <- -120.7530

# Remove Unnecessary Columns
dt_ref <- dt_ref[,c("Utility Code","Plant Code","FIPS","TypeofFuel","lat","lng")]
dt_ref <- dt_ref[complete.cases(dt_ref),]
head(dt_ref)
dim(dt_ref)

# Read Year Data
dt_2004 <- readRDS(here("data", "inter", "eiagenerator", "2004.rds"))
dim(dt_2004)
dt_2004$FIPS <- str_pad(dt_2004$FIPS, width=5, side="left", pad="0")

# Geocode Counties
dt_2004 <- merge(gps_county, dt_2004, by.x="fips", by.y="FIPS", all.y=TRUE)

dt_2004$NewGasDistance <- 0
dt_2004$NewCoalDistance <- 0
dt_2004$NewSunDistance <- 0
dt_2004$NewWindDistance <- 0

## Find Distance (in METERS)
for (i in 1:nrow(dt_2004)) {
  
  dt_ref$dist <- distVincentyEllipsoid(dt_2004[i,c('clon00','clat00')], dt_ref[,c('lng','lat')])
  
  ## NEW GAS DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Gas", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2004$NewGasDistance[i] <- NA
  } else {
    dt_2004$NewGasDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW COAL DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Coal", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2004$NewCoalDistance[i] <- NA
  } else {
    dt_2004$NewCoalDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW SUN DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Sun", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2004$NewSunDistance[i] <- NA
  } else {
    dt_2004$NewSunDistance[i] <- dt_ref$dist[index] 
  } 
  
  ## NEW WIND DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Wind", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2004$NewWindDistance[i] <- NA
  } else {
    dt_2004$NewWindDistance[i] <- dt_ref$dist[index] 
  }
  
}

dt_2004 <- dt_2004[ , -which(names(dt_2004) %in% c("clon00","clat00"))]
names(dt_2004)[names(dt_2004) == 'fips'] <- 'FIPS'
head(dt_2004)
dim(dt_2004)
saveRDS(dt_2004, here("data", "inter", "eiagenerator", "completed", "2004.rds"))
rm(list = c("dt_2004", "dt_ref", "gen", "plant"))
gc()



# 2008

# Read Files
gen <- read_excel(here("data", "input", "eiagenerator", "GenY08.xls"))

# Year, FIPS Columns
dt_2008 <- county_fips
dt_2008 <- dt_2008[complete.cases(dt_2008),]
dt_2008$Year <- "2008"
dt_2008 <- dt_2008[,-3]
colnames(dt_2008) <- c("FIPS","County","Year")

# load plant data
plant <- readRDS(here("data", "inter", "eiagenerator", "crosswalk", "plant_fuzzy_2008.rds"))

# Gas, Wind, or Solar?
gen <- gen %>% mutate(TypeofFuel = case_when(ENERGY_SOURCE_1 == "BFG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "GAS" ~ "Gas",
                                             ENERGY_SOURCE_1 == "LNG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "LPG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "NG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "RG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "SNG" ~ "Gas",
                                             ENERGY_SOURCE_1 == "ANT" ~ "Coal",
                                             ENERGY_SOURCE_1 == "BIT" ~ "Coal",
                                             ENERGY_SOURCE_1 == "COL" ~ "Coal",
                                             ENERGY_SOURCE_1 == "CWM" ~ "Coal",
                                             ENERGY_SOURCE_1 == "LIG" ~ "Coal",
                                             ENERGY_SOURCE_1 == "SUB" ~ "Coal",
                                             ENERGY_SOURCE_1 == "SUN" ~ "Sun",
                                             ENERGY_SOURCE_1 == "WND" ~ "Wind"))
gen <- gen[,c("UTILCODE","PLNTCODE","OPERATING_YEAR","OPERATING_MONTH","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("UTILCODE","PLNTCODE"))
dt_ref <- dt_ref[complete.cases(dt_ref),]
head(dt_ref)
dim(dt_ref)

# Rename Dataset
dt_ref <- dt_ref %>% 
  dplyr::rename(
    `Utility Code` = UTILCODE,
    `Plant Code` = PLNTCODE,
    `state_abbr` = STATE,
    `Plant Name` = PLNTNAME,
    `County Name` = COUNTY,
    `Plant Zip Code` = ZIP5,
    `In-Service Month` = OPERATING_MONTH,
    `In-Service Year` = OPERATING_YEAR
  )
head(dt_ref)

# Tweak Dataset
#dt_ref <- subset(dt_ref, `In-Service Year`=='2008')
dt_ref <- subset(dt_ref, `In-Service Year`%in%c("2008","2007","2006","2005"))
dt_ref$`Plant Zip Code` <- str_pad(dt_ref$`Plant Zip Code`, width=5, side="left", pad="0")
dt_ref$`In-Service Year` <- NULL
dt_ref$`In-Service Month` <- NULL
dt_ref <- distinct(dt_ref)
dim(dt_ref)

# Geocode ZIP Code
dt_ref <- merge(dt_ref, zipcode, by.x='Plant Zip Code', by.y='zip', all.x=TRUE)
dt_ref <- dt_ref %>% 
  dplyr::rename(
    lat = latitude,
    lng = longitude,
  )
subset(dt_ref, is.na(lat))
dt_ref[dt_ref$`Plant Zip Code`=="18110",]$lat <- 39.8592
dt_ref[dt_ref$`Plant Zip Code`=="18110",]$lng <- -75.0144
dt_ref[dt_ref$`Plant Zip Code`=="56124",]$lat <- 44.0243
dt_ref[dt_ref$`Plant Zip Code`=="56124",]$lng <- -95.8143
dt_ref[dt_ref$`Plant Zip Code`=="56250",]$lat <- 44.0243
dt_ref[dt_ref$`Plant Zip Code`=="56250",]$lng <- -95.8143
dt_ref[dt_ref$`Plant Zip Code`=="56383",]$lat <- 44.3516
dt_ref[dt_ref$`Plant Zip Code`=="56383",]$lng <- -95.3103
dt_ref[dt_ref$`Plant Zip Code`=="66730",]$lat <- 37.1142
dt_ref[dt_ref$`Plant Zip Code`=="66730",]$lng <- -94.8106
dt_ref[dt_ref$`Plant Zip Code`=="73906",]$lat <- 39.6930
dt_ref[dt_ref$`Plant Zip Code`=="73906",]$lng <- -100.4621


# Remove Unnecessary Columns
dt_ref <- dt_ref[,c("Utility Code","Plant Code","FIPS","TypeofFuel","lat","lng")]
dt_ref <- dt_ref[complete.cases(dt_ref),]
head(dt_ref)
dim(dt_ref)

# Read Year Data
dt_2008 <- readRDS(here("data", "inter", "eiagenerator", "2008.rds"))
dim(dt_2008)
dt_2008$FIPS <- str_pad(dt_2008$FIPS, width=5, side="left", pad="0")

# Geocode Counties
dt_2008 <- merge(gps_county, dt_2008, by.x="fips", by.y="FIPS", all.y=TRUE)

dt_2008$NewGasDistance <- 0
dt_2008$NewCoalDistance <- 0
dt_2008$NewSunDistance <- 0
dt_2008$NewWindDistance <- 0

## Find Distance (in METERS)
for (i in 1:nrow(dt_2008)) {
  
  dt_ref$dist <- distVincentyEllipsoid(dt_2008[i,c('clon00','clat00')], dt_ref[,c('lng','lat')])
  
  ## NEW GAS DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Gas", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2008$NewGasDistance[i] <- NA
  } else {
    dt_2008$NewGasDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW COAL DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Coal", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2008$NewCoalDistance[i] <- NA
  } else {
    dt_2008$NewCoalDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW SUN DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Sun", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2008$NewSunDistance[i] <- NA
  } else {
    dt_2008$NewSunDistance[i] <- dt_ref$dist[index] 
  } 
  
  ## NEW WIND DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Wind", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2008$NewWindDistance[i] <- NA
  } else {
    dt_2008$NewWindDistance[i] <- dt_ref$dist[index] 
  }
  
}

dt_2008 <- dt_2008[ , -which(names(dt_2008) %in% c("clon00","clat00"))]
names(dt_2008)[names(dt_2008) == 'fips'] <- 'FIPS'
head(dt_2008)
dim(dt_2008)
saveRDS(dt_2008, here("data", "inter", "eiagenerator", "completed", "2008.rds"))
rm(list = c("dt_2008", "dt_ref", "gen", "plant"))
gc()


# 2012
gen <- read_excel(here("data", "input", "eiagenerator", "GeneratorY2012.xlsx"),skip=1)

# Year, FIPS Columns
dt_2012 <- county_fips
dt_2012 <- dt_2012[complete.cases(dt_2012),]
dt_2012$Year <- "2012"
dt_2012 <- dt_2012[,-3]
colnames(dt_2012) <- c("FIPS","County","Year")

# load plant data
plant <- readRDS(here("data", "inter", "eiagenerator", "crosswalk", "plant_fuzzy_2012.rds"))

# Gas, Wind, or Solar?
gen <- gen %>% mutate(TypeofFuel = case_when(`Energy Source 1` == "BFG" ~ "Gas",
                                             `Energy Source 1` == "GAS" ~ "Gas",
                                             `Energy Source 1` == "LNG" ~ "Gas",
                                             `Energy Source 1` == "LPG" ~ "Gas",
                                             `Energy Source 1` == "NG" ~ "Gas",
                                             `Energy Source 1` == "RG" ~ "Gas",
                                             `Energy Source 1` == "SNG" ~ "Gas",
                                             `Energy Source 1` == "ANT" ~ "Coal",
                                             `Energy Source 1` == "BIT" ~ "Coal",
                                             `Energy Source 1` == "COL" ~ "Coal",
                                             `Energy Source 1` == "CWM" ~ "Coal",
                                             `Energy Source 1` == "LIG" ~ "Coal",
                                             `Energy Source 1` == "SUB" ~ "Coal",
                                             `Energy Source 1` == "SUN" ~ "Sun",
                                             `Energy Source 1` == "WND" ~ "Wind"))
gen <- gen[,c("Utility ID","Plant Code","Operating Year","Operating Month","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("Utility ID","Plant Code"))
dt_ref <- dt_ref[complete.cases(dt_ref),]
head(dt_ref)
dim(dt_ref)
# Rename Dataset
dt_ref <- dt_ref %>% 
  dplyr::rename(
    `Utility Code` = `Utility ID`,
    `Plant Code` = `Plant Code`,
    `state_abbr` = State,
    `Plant Name` = `Plant Name`,
    `County Name` = County,
    `Plant Zip Code` = Zip,
    `In-Service Month` = `Operating Month`,
    `In-Service Year` = `Operating Year`
  )
head(dt_ref)

# Tweak Dataset
#dt_ref <- subset(dt_ref, `In-Service Year`=='2012')
dt_ref <- subset(dt_ref, `In-Service Year`%in%c("2012","2011","2010","2009"))
dt_ref$`Plant Zip Code` <- str_pad(dt_ref$`Plant Zip Code`, width=5, side="left", pad="0")
dt_ref$`In-Service Month` <- NULL
dt_ref$`In-Service Year` <- NULL
dt_ref <- distinct(dt_ref)
dim(dt_ref)

# Geocode ZIP Code
dt_ref <- merge(dt_ref, zipcode, by.x='Plant Zip Code', by.y='zip', all.x=TRUE)
dt_ref <- dt_ref %>% 
  dplyr::rename(
    lat = latitude,
    lng = longitude,
  )
summary(dt_ref$lat)
summary(dt_ref$lng)
subset(dt_ref, is.na(lat))

dt_ref$lat[dt_ref$`Plant Zip Code`==67384] <- 37.6841
dt_ref$lng[dt_ref$`Plant Zip Code`==67384] <- -100.4397

dt_ref$lat[dt_ref$`Plant Zip Code`==94450] <- 37.5611
dt_ref$lng[dt_ref$`Plant Zip Code`==94450] <- -98.0614

# Remove Unnecessary Columns
dt_ref <- dt_ref[,c("Utility Code","Plant Code","FIPS","TypeofFuel","lat","lng")]
dt_ref <- dt_ref[complete.cases(dt_ref),]
head(dt_ref)

# Read Year Data
dt_2012 <- readRDS(here("data", "inter", "eiagenerator", "2012.rds"))
dim(dt_2012)
dt_2012$FIPS <- str_pad(dt_2012$FIPS, width=5, side="left", pad="0")

# Geocode Counties
dt_2012 <- merge(gps_county, dt_2012, by.x="fips", by.y="FIPS", all.y=TRUE)

dt_2012$NewGasDistance <- 0
dt_2012$NewCoalDistance <- 0
dt_2012$NewSunDistance <- 0
dt_2012$NewWindDistance <- 0

## Find Distance (in METERS)
for (i in 1:nrow(dt_2012)) {
  
  dt_ref$dist <- distVincentyEllipsoid(dt_2012[i,c('clon00','clat00')], dt_ref[,c('lng','lat')])
  
  ## NEW GAS DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Gas", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2012$NewGasDistance[i] <- NA
  } else {
    dt_2012$NewGasDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW COAL DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Coal", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2012$NewCoalDistance[i] <- NA
  } else {
    dt_2012$NewCoalDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW SUN DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Sun", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2012$NewSunDistance[i] <- NA
  } else {
    dt_2012$NewSunDistance[i] <- dt_ref$dist[index] 
  } 
  
  ## NEW WIND DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Wind", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2012$NewWindDistance[i] <- NA
  } else {
    dt_2012$NewWindDistance[i] <- dt_ref$dist[index] 
  }
  
}

dt_2012 <- dt_2012[ , -which(names(dt_2012) %in% c("clon00","clat00"))]
names(dt_2012)[names(dt_2012) == 'fips'] <- 'FIPS'
head(dt_2012)
dim(dt_2012)
saveRDS(dt_2012, here("data", "inter", "eiagenerator", "completed", "2012.rds"))
rm(list = c("dt_2012", "dt_ref", "gen", "plant"))
gc()

#2016
# Read Files
gen <- read_excel(here("data", "input", "eiagenerator", "3_1_Generator_Y2016.xlsx"), skip = 1)

# Year, FIPS Columns
dt_2016 <- county_fips
dt_2016 <- dt_2016[complete.cases(dt_2016),]
dt_2016$Year <- "2016"
dt_2016 <- dt_2016[,-3]
colnames(dt_2016) <- c("FIPS","County","Year")

# load plant data
plant <- readRDS(here("data", "inter", "eiagenerator", "crosswalk", "plant_fuzzy_2016.rds"))
dim(plant)

# Gas, Wind, or Solar?
gen <- gen %>% mutate(TypeofFuel = case_when(`Energy Source 1` == "BFG" ~ "Gas",
                                             `Energy Source 1` == "GAS" ~ "Gas",
                                             `Energy Source 1` == "LNG" ~ "Gas",
                                             `Energy Source 1` == "LPG" ~ "Gas",
                                             `Energy Source 1` == "NG" ~ "Gas",
                                             `Energy Source 1` == "RG" ~ "Gas",
                                             `Energy Source 1` == "SNG" ~ "Gas",
                                             `Energy Source 1` == "ANT" ~ "Coal",
                                             `Energy Source 1` == "BIT" ~ "Coal",
                                             `Energy Source 1` == "COL" ~ "Coal",
                                             `Energy Source 1` == "CWM" ~ "Coal",
                                             `Energy Source 1` == "LIG" ~ "Coal",
                                             `Energy Source 1` == "SUB" ~ "Coal",
                                             `Energy Source 1` == "SUN" ~ "Sun",
                                             `Energy Source 1` == "WND" ~ "Wind"))
gen <- gen[,c("Utility ID","Plant Code","Operating Year","Operating Month","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("Utility ID","Plant Code"))
dt_ref <- dt_ref[complete.cases(dt_ref),]
head(dt_ref)

# Rename Dataset
dt_ref <- dt_ref %>% 
  dplyr::rename(
    `Utility Code` = `Utility ID`,
    `Plant Code` = `Plant Code`,
    `state_abbr` = State,
    `Plant Name` = `Plant Name`,
    `County Name` = County,
    `Plant Zip Code` = Zip,
    `In-Service Month` = `Operating Month`,
    `In-Service Year` = `Operating Year`
  )
head(dt_ref)

# Tweak Dataset
dt_ref <- subset(dt_ref, `In-Service Year`=='2016')
dt_ref$`Plant Zip Code` <- str_pad(dt_ref$`Plant Zip Code`, width=5, side="left", pad="0")
dt_ref <- distinct(dt_ref)

# Geocode ZIP Code
dt_ref <- merge(dt_ref, zipcode, by.x='Plant Zip Code', by.y='zip', all.x=TRUE)
dt_ref <- dt_ref %>% 
  dplyr::rename(
    lat = latitude,
    lng = longitude,
  )
subset(dt_ref, is.na(lat))
dt_ref$lat[dt_ref$`Plant Zip Code`==89919] <- 36.0796
dt_ref$lng[dt_ref$`Plant Zip Code`==89919] <- -115.0940

# Remove Unnecessary Columns
dt_ref <- dt_ref[,c("Utility Code","Plant Code","FIPS","TypeofFuel","lat","lng")]
dt_ref <- dt_ref[complete.cases(dt_ref),]
head(dt_ref)

# Read Year Data
dt_2016 <- readRDS(here("data", "inter", "eiagenerator", "2016.rds"))
dim(dt_2016)
dt_2016$FIPS <- str_pad(dt_2016$FIPS, width=5, side="left", pad="0")

# Geocode Counties
dt_2016 <- merge(gps_county, dt_2016, by.x="fips", by.y="FIPS", all.y=TRUE)

dt_2016$NewGasDistance <- 0
dt_2016$NewCoalDistance <- 0
dt_2016$NewSunDistance <- 0
dt_2016$NewWindDistance <- 0

## Find Distance (in METERS)
for (i in 1:nrow(dt_2016)) {
  
  dt_ref$dist <- distVincentyEllipsoid(dt_2016[i,c('clon00','clat00')], dt_ref[,c('lng','lat')])
  
  ## NEW GAS DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Gas", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2016$NewGasDistance[i] <- NA
  } else {
    dt_2016$NewGasDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW COAL DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Coal", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2016$NewCoalDistance[i] <- NA
  } else {
    dt_2016$NewCoalDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW SUN DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Sun", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2016$NewSunDistance[i] <- NA
  } else {
    dt_2016$NewSunDistance[i] <- dt_ref$dist[index] 
  } 
  
  ## NEW WIND DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Wind", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2016$NewWindDistance[i] <- NA
  } else {
    dt_2016$NewWindDistance[i] <- dt_ref$dist[index] 
  }
  
}

dt_2016 <- dt_2016[ , -which(names(dt_2016) %in% c("clon00","clat00"))]
names(dt_2016)[names(dt_2016) == 'fips'] <- 'FIPS'
head(dt_2016)
dim(dt_2016)
saveRDS(dt_2016, here("data", "inter", "eiagenerator", "completed", "2016.rds"))
rm(list = c("dt_2016", "dt_ref", "gen", "plant"))
gc()


# 2020----
# Read Files
gen <- read_excel(here("data", "input", "eiagenerator", "3_1_Generator_Y2020.xlsx"))

# Year, FIPS Columns
dt_2020 <- county_fips
dt_2020 <- dt_2020[complete.cases(dt_2020),]
dt_2020$Year <- "2020"
dt_2020 <- dt_2020[,-3]
colnames(dt_2020) <- c("FIPS","County","Year")

# Remove Unnecessary Columns in plant
plant <- readRDS(here("data", "inter", "eiagenerator", "crosswalk", "plant_fuzzy_2020.rds"))

# Gas, Wind, or Solar?
head(gen)
gen <- gen %>% mutate(TypeofFuel = case_when(`Energy Source 1` == "BFG" ~ "Gas",
                                             `Energy Source 1` == "GAS" ~ "Gas",
                                             `Energy Source 1` == "LNG" ~ "Gas",
                                             `Energy Source 1` == "LPG" ~ "Gas",
                                             `Energy Source 1` == "NG" ~ "Gas",
                                             `Energy Source 1` == "RG" ~ "Gas",
                                             `Energy Source 1` == "SNG" ~ "Gas",
                                             `Energy Source 1` == "ANT" ~ "Coal",
                                             `Energy Source 1` == "BIT" ~ "Coal",
                                             `Energy Source 1` == "COL" ~ "Coal",
                                             `Energy Source 1` == "CWM" ~ "Coal",
                                             `Energy Source 1` == "LIG" ~ "Coal",
                                             `Energy Source 1` == "SUB" ~ "Coal",
                                             `Energy Source 1` == "SUN" ~ "Sun",
                                             `Energy Source 1` == "WND" ~ "Wind"))
gen <- gen[,c("Utility ID","Plant Code","Operating Year","Operating Month","TypeofFuel")]

# Merge into one dataset
dt_ref <- merge(plant, gen, by=c("Utility ID","Plant Code"))
dt_ref <- dt_ref[complete.cases(dt_ref),]
head(dt_ref)

# Rename Dataset
dt_ref <- dt_ref %>% 
  dplyr::rename(
    `Utility Code` = `Utility ID`,
    `Plant Code` = `Plant Code`,
    `state_abbr` = State,
    `Plant Name` = `Plant Name`,
    `County Name` = County,
    `Plant Zip Code` = Zip,
    `In-Service Month` = `Operating Month`,
    `In-Service Year` = `Operating Year`
  )
head(dt_ref)

# Tweak Dataset
dt_ref <- subset(dt_ref, `In-Service Year`=='2020')
dt_ref$`Plant Zip Code` <- str_pad(dt_ref$`Plant Zip Code`, width=5, side="left", pad="0")
dt_ref <- distinct(dt_ref)

# Geocode ZIP Code
dt_ref <- merge(dt_ref, zipcode, by.x='Plant Zip Code', by.y='zip', all.x=TRUE)
dt_ref <- dt_ref %>% 
  dplyr::rename(
    lat = latitude,
    lng = longitude,
  )

dt_ref$lat[dt_ref$`Plant Zip Code`==27089] <- 35.91  
dt_ref$lng[dt_ref$`Plant Zip Code`==27089] <- -77.6
dt_ref$lat[dt_ref$`Plant Zip Code`==67821] <- 40.14 
dt_ref$lng[dt_ref$`Plant Zip Code`==67821] <- -88.2
dt_ref$lat[dt_ref$`Plant Zip Code`=="09363"] <- 37.2519 
dt_ref$lng[dt_ref$`Plant Zip Code`=="09363"] <- -119.6963


# Remove Unnecessary Columns
dt_ref <- dt_ref[,c("Utility Code","Plant Code","FIPS","TypeofFuel","lat","lng")]
dt_ref <- dt_ref[complete.cases(dt_ref),]
head(dt_ref)

# Read Year Data
dt_2020 <- readRDS(here("data", "inter", "eiagenerator", "2020.rds"))
dt_2020$FIPS <- str_pad(dt_2020$FIPS, width=5, side="left", pad="0")
dim(dt_2020)

# Geocode Counties
dt_2020 <- merge(gps_county, dt_2020, by.x="fips", by.y="FIPS", all.y=TRUE)

dt_2020$NewGasDistance <- 0
dt_2020$NewCoalDistance <- 0
dt_2020$NewSunDistance <- 0
dt_2020$NewWindDistance <- 0

## Find Distance (in METERS)
for (i in 1:nrow(dt_2020)) {
  
  dt_ref$dist <- distVincentyEllipsoid(dt_2020[i,c('clon00','clat00')], dt_ref[,c('lng','lat')])
  
  ## NEW GAS DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Gas", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2020$NewGasDistance[i] <- NA
  } else {
    dt_2020$NewGasDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW COAL DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Coal", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2020$NewCoalDistance[i] <- NA
  } else {
    dt_2020$NewCoalDistance[i] <- dt_ref$dist[index] 
  }
  
  ## NEW SUN DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Sun", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2020$NewSunDistance[i] <- NA
  } else {
    dt_2020$NewSunDistance[i] <- dt_ref$dist[index] 
  } 
  
  ## NEW WIND DISTANCE
  index <- as.numeric(which.min(ifelse(dt_ref$TypeofFuel=="Wind", dt_ref$dist, NA)))
  if (length(index)==0) {
    dt_2020$NewWindDistance[i] <- NA
  } else {
    dt_2020$NewWindDistance[i] <- dt_ref$dist[index] 
  }
  
}

dt_2020 <- dt_2020[ , -which(names(dt_2020) %in% c("clon00","clat00"))]
names(dt_2020)[names(dt_2020) == 'fips'] <- 'FIPS'
head(dt_2020)
dim(dt_2020)
saveRDS(dt_2020, here("data", "inter", "eiagenerator", "completed", "2020.rds"))
rm(list = c("dt_2020", "dt_ref", "gen", "plant"))
gc()
